AGA Toolkit '97

home *** CD-ROM | disk | FTP | other *** search

/ AGA Toolkit '97 / The AGA Toolkit '97.iso / miscellaneous / science / maths / calc / source / zmul.c < prev

Wrap

C/C++ Source or Header | 1996-09-07 | 29.5 KB | 1,106 lines

/* * Copyright (c) 1994 David I. Bell * Permission is granted to use, distribute, or modify this source, * provided that this copyright notice remains intact. * * Faster than usual multiplying and squaring routines. * The algorithm used is the reasonably simple one from Knuth, volume 2, * section 4.3.3. These recursive routines are of speed O(N^1.585) * instead of O(N^2). The usual multiplication and (almost usual) squaring * algorithms are used for small numbers. On a 386 with its compiler, the * two algorithms are equal in speed at about 100 decimal digits. */ #include "zmath.h" LEN _mul2_ = MUL_ALG2; /* size of number to use multiply algorithm 2 */ LEN _sq2_ = SQ_ALG2; /* size of number to use square algorithm 2 */ static HALF *tempbuf; /* temporary buffer for multiply and square */ static LEN domul MATH_PROTO((HALF *v1, LEN size1, HALF *v2, LEN size2, HALF *ans)); static LEN dosquare MATH_PROTO((HALF *vp, LEN size, HALF *ans)); /* * Multiply two numbers using the following formula recursively: * (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D * where S is a power of 2^16, and so multiplies by it are shifts, and * A,B,C,D are the left and right halfs of the numbers to be multiplied. */ void zmul(z1, z2, res) ZVALUE z1, z2; /* numbers to multiply */ ZVALUE *res; /* result of multiplication */ { LEN len; /* size of array */ if (ziszero(z1) || ziszero(z2)) { *res = _zero_; return; } if (zisunit(z1)) { zcopy(z2, res); res->sign = (z1.sign != z2.sign); return; } if (zisunit(z2)) { zcopy(z1, res); res->sign = (z1.sign != z2.sign); return; } /* * Allocate a temporary buffer for the recursion levels to use. * An array needs to be allocated large enough for all of the * temporary results to fit in. This size is about twice the size * of the largest original number, since each recursion level uses * the size of its given number, and whose size is 1/2 the size of * the previous level. The sum of the infinite series is 2. * Add some extra words because of rounding when dividing by 2 * and also because of the extra word that each multiply needs. */ len = z1.len; if (len < z2.len) len = z2.len; len = 2 * len + 64; tempbuf = zalloctemp(len); res->sign = (z1.sign != z2.sign); res->v = alloc(z1.len + z2.len + 1); res->len = domul(z1.v, z1.len, z2.v, z2.len, res->v); } /* * Recursive routine to multiply two numbers by splitting them up into * two numbers of half the size, and using the results of multiplying the * subpieces. The result is placed in the indicated array, which must be * large enough for the result plus one extra word (size1 + size2 + 1). * Returns the actual size of the result with leading zeroes stripped. * This also uses a temporary array which must be twice as large as * one more than the size of the number at the top level recursive call. */ static LEN domul(v1, size1, v2, size2, ans) HALF *v1; /* first number */ LEN size1; /* size of first number */ HALF *v2; /* second number */ LEN size2; /* size of second number */ HALF *ans; /* location for result */ { LEN shift; /* amount numbers are shifted by */ LEN sizeA; /* size of left half of first number */ LEN sizeB; /* size of right half of first number */ LEN sizeC; /* size of left half of second number */ LEN sizeD; /* size of right half of second number */ LEN sizeAB; /* size of subtraction of A and B */ LEN sizeDC; /* size of subtraction of D and C */ LEN sizeABDC; /* size of product of above two results */ LEN subsize; /* size of difference of halfs */ LEN copysize; /* size of number left to copy */ LEN sizetotal; /* total size of product */ LEN len; /* temporary length */ HALF *baseA; /* base of left half of first number */ HALF *baseB; /* base of right half of first number */ HALF *baseC; /* base of left half of second number */ HALF *baseD; /* base of right half of second number */ HALF *baseAB; /* base of result of subtraction of A and B */ HALF *baseDC; /* base of result of subtraction of D and C */ HALF *baseABDC; /* base of product of above two results */ HALF *baseAC; /* base of product of A and C */ HALF *baseBD; /* base of product of B and D */ FULL carry; /* carry digit for small multiplications */ FULL carryACBD; /* carry from addition of A*C and B*D */ FULL digit; /* single digit multiplying by */ HALF *temp; /* base for temporary calculations */ BOOL neg; /* whether imtermediate term is negative */ register HALF *hd, *h1=NULL, *h2=NULL; /* for inner loops */ SIUNION sival; /* for addition of digits */ /* * Trim the numbers of leading zeroes and initialize the * estimated size of the result. */ hd = &v1[size1 - 1]; while ((*hd == 0) && (size1 > 1)) { hd--; size1--; } hd = &v2[size2 - 1]; while ((*hd == 0) && (size2 > 1)) { hd--; size2--; } sizetotal = size1 + size2; /* * First check for zero answer. */ if (((size1 == 1) && (*v1 == 0)) || ((size2 == 1) && (*v2 == 0))) { *ans = 0; return 1; } /* * Exchange the two numbers if necessary to make the number of * digits of the first number be greater than or equal to the * second number. */ if (size1 < size2) { len = size1; size1 = size2; size2 = len; hd = v1; v1 = v2; v2 = hd; } /* * If the smaller number has only a few digits, then calculate * the result in the normal manner in order to avoid the overhead * of the recursion for small numbers. The number of digits where * the algorithm changes is settable from 2 to maxint. */ if (size2 < _mul2_) { /* * First clear the top part of the result, and then multiply * by the lowest digit to get the first partial sum. Later * products will then add into this result. */ hd = &ans[size1]; len = size2; while (len--) *hd++ = 0; digit = *v2++; h1 = v1; hd = ans; carry = 0; len = size1; while (len >= 4) { /* expand the loop some */ len -= 4; sival.ivalue = ((FULL) *h1++) * digit + carry; *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *h1++) * digit + carry; *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *h1++) * digit + carry; *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *h1++) * digit + carry; *hd++ = sival.silow; carry = sival.sihigh; } while (len--) { sival.ivalue = ((FULL) *h1++) * digit + carry; *hd++ = sival.silow; carry = sival.sihigh; } *hd = (HALF)carry; /* * Now multiply by the remaining digits of the second number, * adding each product into the final result. */ h2 = ans; while (--size2 > 0) { digit = *v2++; h1 = v1; hd = ++h2; if (digit == 0) continue; carry = 0; len = size1; while (len >= 4) { /* expand the loop some */ len -= 4; sival.ivalue = ((FULL) *h1++) * digit + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *h1++) * digit + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *h1++) * digit + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *h1++) * digit + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } while (len--) { sival.ivalue = ((FULL) *h1++) * digit + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } while (carry) { sival.ivalue = ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } } /* * Now return the true size of the number. */ len = sizetotal; hd = &ans[len - 1]; while ((*hd == 0) && (len > 1)) { hd--; len--; } return len; } /* * Need to multiply by a large number. * Allocate temporary space for calculations, and calculate the * value for the shift. The shift value is 1/2 the size of the * larger (first) number (rounded up). The amount of temporary * space needed is twice the size of the shift, plus one more word * for the multiply to use. */ shift = (size1 + 1) / 2; temp = tempbuf; tempbuf += (2 * shift) + 1; /* * Determine the sizes and locations of all the numbers. * The value of sizeC can be negative, and this is checked later. * The value of sizeD is limited by the full size of the number. */ baseA = v1 + shift; baseB = v1; /* * XXX - Saber-C Version 3.1 says: * * W#26, Storing a bad pointer into auto variable dmul`baseC. * * This warning is issued during the regression test #026 * (read cryrand). * * Saver-C claims that v2+shift is past the end of allocated * memory for v2. When this warning is first issued, shift * has the value 51. * * The call stack is: * * domul(0x165ca0, 101, 0x160998, 40, 0x16d0a8) at "zmul.c":315 * zmul(0x2ddf88, 0x2ddf8c, 0x2ddc98) at "zmul.c":73 * qsqrt(0x38defc, 0x38ea94) at "qfunc.c":248 * qln(0x38defc, 0x38de70) at "qtrans.c":589 * qpower(0x38eacc, 0x38a398, 0x38a018) at "qtrans.c":657 * powervalue(0x1740f8,0x174118,0x195234,0x19523c) at "value.c":1009 * f_power(2, 0x533278) at "func.c":1188 * builtinfunc(117, 2, 0x174118) at "func.c":354 * o_call(0x5328d8, 117, 2) at "opcodes.c":2094 * calculate(0x5328d8, 1) at "opcodes.c":288 * o_usercall(0x5328d8, 54, 1) at "opcodes.c":2082 * calculate(0x48ffa8, 4) at "opcodes.c":288 * o_usercall(0x48ffa8, 53, 1) at "opcodes.c":2082 * calculate(0x1652a0, 1) at "opcodes.c":288 * o_usercall(0x1652a0, 57, 1) at "opcodes.c":2082 * calculate(0x16cca8, 0) at "opcodes.c":288 * evaluate(0) at "codegen.c":167 * getcommands(0) at "codegen.c":106 * getcommands(0) at "codegen.c":76 * getcommands(1) at "codegen.c":76 * main(-1, 0x181f8c) at "calc.c":155 * * Purify does not complain about this code. Who is right? */ baseC = v2 + shift; baseD = v2; baseAB = ans; baseDC = ans + shift; baseAC = ans + shift * 2; baseBD = ans; sizeA = size1 - shift; sizeC = size2 - shift; sizeB = shift; hd = &baseB[shift - 1]; while ((*hd == 0) && (sizeB > 1)) { hd--; sizeB--; } sizeD = shift; if (sizeD > size2) sizeD = size2; hd = &baseD[sizeD - 1]; while ((*hd == 0) && (sizeD > 1)) { hd--; sizeD--; } /* * If the smaller number has a high half of zero, then calculate * the result by breaking up the first number into two numbers * and combining the results using the obvious formula: * (A*S+B) * D = (A*D)*S + B*D */ if (sizeC <= 0) { len = domul(baseB, sizeB, baseD, sizeD, ans); hd = &ans[len]; len = sizetotal - len; while (len--) *hd++ = 0; /* * Add the second number into the first number, shifted * over at the correct position. */ len = domul(baseA, sizeA, baseD, sizeD, temp); h1 = temp; hd = ans + shift; carry = 0; while (len--) { sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } while (carry) { sival.ivalue = ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } /* * Determine the final size of the number and return it. */ len = sizetotal; hd = &ans[len - 1]; while ((*hd == 0) && (len > 1)) { hd--; len--; } tempbuf = temp; return len; } /* * Now we know that the high halfs of the numbers are nonzero, * so we can use the complete formula. * (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D. * The steps are done in the following order: * A-B * D-C * (A-B)*(D-C) * S^2*A*C + B*D * (S^2+S)*A*C + (S+1)*B*D (*) * (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D * * Note: step (*) above can produce a result which is larger than * the final product will be, and this is where the extra word * needed in the product comes from. After the final subtraction is * done, the result fits in the expected size. Using the extra word * is easier than suppressing the carries and borrows everywhere. * * Begin by forming the product (A-B)*(D-C) into a temporary * location that we save until the final step. Do each subtraction * at positions 0 and S. Be very careful about the relative sizes * of the numbers since this result can be negative. For the first * step calculate the absolute difference of A and B into a temporary * location at position 0 of the result. Negate the sign if A is * smaller than B. */ neg = FALSE; if (sizeA == sizeB) { len = sizeA; h1 = &baseA[len - 1]; h2 = &baseB[len - 1]; while ((len > 1) && (*h1 == *h2)) { len--; h1--; h2--; } } if ((sizeA > sizeB) || ((sizeA == sizeB) && h1 && h2 && (*h1 > *h2))) { h1 = baseA; h2 = baseB; sizeAB = sizeA; subsize = sizeB; } else { neg = !neg; h1 = baseB; h2 = baseA; sizeAB = sizeB; subsize = sizeA; } copysize = sizeAB - subsize; hd = baseAB; carry = 0; while (subsize--) { sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry; *hd++ = BASE1 - sival.silow; carry = sival.sihigh; } while (copysize--) { sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry; *hd++ = BASE1 - sival.silow; carry = sival.sihigh; } hd = &baseAB[sizeAB - 1]; while ((*hd == 0) && (sizeAB > 1)) { hd--; sizeAB--; } /* * This completes the calculation of abs(A-B). For the next step * calculate the absolute difference of D and C into a temporary * location at position S of the result. Negate the sign if C is * larger than D. */ if (sizeC == sizeD) { len = sizeC; h1 = &baseC[len - 1]; h2 = &baseD[len - 1]; while ((len > 1) && (*h1 == *h2)) { len--; h1--; h2--; } } if ((sizeC > sizeD) || ((sizeC == sizeD) && (*h1 > *h2))) { neg = !neg; h1 = baseC; h2 = baseD; sizeDC = sizeC; subsize = sizeD; } else { h1 = baseD; h2 = baseC; sizeDC = sizeD; subsize = sizeC; } copysize = sizeDC - subsize; hd = baseDC; carry = 0; while (subsize--) { sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry; *hd++ = BASE1 - sival.silow; carry = sival.sihigh; } while (copysize--) { sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry; *hd++ = BASE1 - sival.silow; carry = sival.sihigh; } hd = &baseDC[sizeDC - 1]; while ((*hd == 0) && (sizeDC > 1)) { hd--; sizeDC--; } /* * This completes the calculation of abs(D-C). Now multiply * together abs(A-B) and abs(D-C) into a temporary location, * which is preserved until the final steps. */ baseABDC = temp; sizeABDC = domul(baseAB, sizeAB, baseDC, sizeDC, baseABDC); /* * Now calculate B*D and A*C into one of their two final locations. * Make sure the high order digits of the products are zeroed since * this initializes the final result. Be careful about this zeroing * since the size of the high order words might be smaller than * the shift size. Do B*D first since the multiplies use one more * word than the size of the product. Also zero the final extra * word in the result for possible carries to use. */ len = domul(baseB, sizeB, baseD, sizeD, baseBD); hd = &baseBD[len]; len = shift * 2 - len; while (len--) *hd++ = 0; len = domul(baseA, sizeA, baseC, sizeC, baseAC); hd = &baseAC[len]; len = sizetotal - shift * 2 - len + 1; while (len--) *hd++ = 0; /* * Now add in A*C and B*D into themselves at the other shifted * position that they need. This addition is tricky in order to * make sure that the two additions cannot interfere with each other. * Therefore we first add in the top half of B*D and the lower half * of A*C. The sources and destinations of these two additions * overlap, and so the same answer results from the two additions, * thus only two pointers suffice for both additions. Keep the * final carry from these additions for later use since we cannot * afford to change the top half of A*C yet. */ h1 = baseBD + shift; h2 = baseAC; carryACBD = 0; len = shift; while (len--) { sival.ivalue = ((FULL) *h1) + ((FULL) *h2) + carryACBD; *h1++ = sival.silow; *h2++ = sival.silow; carryACBD = sival.sihigh; } /* * Now add in the bottom half of B*D and the top half of A*C. * These additions are straightforward, except that A*C should * be done first because of possible carries from B*D, and the * top half of A*C might not exist. Add in one of the carries * from the previous addition while we are at it. */ h1 = baseAC + shift; hd = baseAC; carry = carryACBD; len = sizetotal - 3 * shift; while (len--) { sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } while (carry) { sival.ivalue = ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } h1 = baseBD; hd = baseBD + shift; carry = 0; len = shift; while (len--) { sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } while (carry) { sival.ivalue = ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } /* * Now finally add in the other delayed carry from the * above addition. */ hd = baseAC + shift; while (carryACBD) { sival.ivalue = ((FULL) *hd) + carryACBD; *hd++ = sival.silow; carryACBD = sival.sihigh; } /* * Now finally add or subtract (A-B)*(D-C) into the final result at * the correct position (S), according to whether it is positive or * negative. When subtracting, the answer cannot go negative. */ h1 = baseABDC; hd = ans + shift; carry = 0; len = sizeABDC; if (neg) { while (len--) { sival.ivalue = BASE1 - ((FULL) *hd) + ((FULL) *h1++) + carry; *hd++ = BASE1 - sival.silow; carry = sival.sihigh; } while (carry) { sival.ivalue = BASE1 - ((FULL) *hd) + carry; *hd++ = BASE1 - sival.silow; carry = sival.sihigh; } } else { while (len--) { sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } while (carry) { sival.ivalue = ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } } /* * Finally determine the size of the final result and return that. */ len = sizetotal; hd = &ans[len - 1]; while ((*hd == 0) && (len > 1)) { hd--; len--; } tempbuf = temp; return len; } /* * Square a number by using the following formula recursively: * (A*S+B)^2 = (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2 * where S is a power of 2^16, and so multiplies by it are shifts, * and A and B are the left and right halfs of the number to square. */ void zsquare(z, res) ZVALUE z, *res; { LEN len; if (ziszero(z)) { *res = _zero_; return; } if (zisunit(z)) { *res = _one_; return; } /* * Allocate a temporary array if necessary for the recursion to use. * The array needs to be allocated large enough for all of the * temporary results to fit in. This size is about 3 times the * size of the original number, since each recursion level uses 3/2 * of the size of its given number, and whose size is 1/2 the size * of the previous level. The sum of the infinite series is 3. * Allocate some extra words for rounding up the sizes. */ len = 3 * z.len + 32; tempbuf = zalloctemp(len); res->sign = 0; res->v = alloc((z.len+1) * 2); res->len = dosquare(z.v, z.len, res->v); } /* * Recursive routine to square a number by splitting it up into two numbers * of half the size, and using the results of squaring the subpieces. * The result is placed in the indicated array, which must be large * enough for the result (size * 2). Returns the size of the result. * This uses a temporary array which must be 3 times as large as the * size of the number at the top level recursive call. */ static LEN dosquare(vp, size, ans) HALF *vp; /* value to be squared */ LEN size; /* length of value to square */ HALF *ans; /* location for result */ { LEN shift; /* amount numbers are shifted by */ LEN sizeA; /* size of left half of number to square */ LEN sizeB; /* size of right half of number to square */ LEN sizeAA; /* size of square of left half */ LEN sizeBB; /* size of square of right half */ LEN sizeAABB; /* size of sum of squares of A and B */ LEN sizeAB; /* size of difference of A and B */ LEN sizeABAB; /* size of square of difference of A and B */ LEN subsize; /* size of difference of halfs */ LEN copysize; /* size of number left to copy */ LEN sumsize; /* size of sum */ LEN sizetotal; /* total size of square */ LEN len; /* temporary length */ LEN len1; /* another temporary length */ FULL carry; /* carry digit for small multiplications */ FULL digit; /* single digit multiplying by */ HALF *temp; /* base for temporary calculations */ HALF *baseA; /* base of left half of number */ HALF *baseB; /* base of right half of number */ HALF *baseAA; /* base of square of left half of number */ HALF *baseBB; /* base of square of right half of number */ HALF *baseAABB; /* base of sum of squares of A and B */ HALF *baseAB; /* base of difference of A and B */ HALF *baseABAB; /* base of square of difference of A and B */ register HALF *hd, *h1, *h2, *h3; /* for inner loops */ SIUNION sival; /* for addition of digits */ /* * First trim the number of leading zeroes. */ hd = &vp[size - 1]; while ((*hd == 0) && (size > 1)) { size--; hd--; } sizetotal = size + size; /* * If the number has only a small number of digits, then use the * (almost) normal multiplication method. Multiply each halfword * only by those halfwards further on in the number. Missed terms * will then be the same pairs of products repeated, and the squares * of each halfword. The first case is handled by doubling the * result. The second case is handled explicitly. The number of * digits where the algorithm changes is settable from 2 to maxint. */ if (size < _sq2_) { hd = ans; len = sizetotal; while (len--) *hd++ = 0; h2 = vp; hd = ans + 1; for (len = size; len--; hd += 2) { digit = (FULL) *h2++; if (digit == 0) continue; h3 = h2; h1 = hd; carry = 0; len1 = len; while (len1 >= 4) { /* expand the loop some */ len1 -= 4; sival.ivalue = (digit * ((FULL) *h3++)) + ((FULL) *h1) + carry; *h1++ = sival.silow; sival.ivalue = (digit * ((FULL) *h3++)) + ((FULL) *h1) + ((FULL) sival.sihigh); *h1++ = sival.silow; sival.ivalue = (digit * ((FULL) *h3++)) + ((FULL) *h1) + ((FULL) sival.sihigh); *h1++ = sival.silow; sival.ivalue = (digit * ((FULL) *h3++)) + ((FULL) *h1) + ((FULL) sival.sihigh); *h1++ = sival.silow; carry = sival.sihigh; } while (len1--) { sival.ivalue = (digit * ((FULL) *h3++)) + ((FULL) *h1) + carry; *h1++ = sival.silow; carry = sival.sihigh; } while (carry) { sival.ivalue = ((FULL) *h1) + carry; *h1++ = sival.silow; carry = sival.sihigh; } } /* * Now double the result. * There is no final carry to worry about because we * handle all digits of the result which must fit. */ carry = 0; hd = ans; len = sizetotal; while (len--) { digit = ((FULL) *hd); sival.ivalue = digit + digit + carry; *hd++ = sival.silow; carry = sival.sihigh; } /* * Now add in the squares of each halfword. */ carry = 0; hd = ans; h3 = vp; len = size; while (len--) { digit = ((FULL) *h3++); sival.ivalue = digit * digit + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } while (carry) { sival.ivalue = ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } /* * Finally return the size of the result. */ len = sizetotal; hd = &ans[len - 1]; while ((*hd == 0) && (len > 1)) { len--; hd--; } return len; } /* * The number to be squared is large. * Allocate temporary space and determine the sizes and * positions of the values to be calculated. */ temp = tempbuf; tempbuf += (3 * (size + 1) / 2); sizeA = size / 2; sizeB = size - sizeA; shift = sizeB; baseA = vp + sizeB; baseB = vp; baseAA = &ans[shift * 2]; baseBB = ans; baseAABB = temp; baseAB = temp; baseABAB = &temp[shift]; /* * Trim the second number of leading zeroes. */ hd = &baseB[sizeB - 1]; while ((*hd == 0) && (sizeB > 1)) { sizeB--; hd--; } /* * Now to proceed to calculate the result using the formula. * (A*S+B)^2 = (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2. * The steps are done in the following order: * S^2*A^2 + B^2 * A^2 + B^2 * (S^2+S)*A^2 + (S+1)*B^2 * (A-B)^2 * (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2. * * Begin by forming the squares of two the halfs concatenated * together in the final result location. Make sure that the * highest words of the results are zero. */ sizeBB = dosquare(baseB, sizeB, baseBB); hd = &baseBB[sizeBB]; len = shift * 2 - sizeBB; while (len--) *hd++ = 0; sizeAA = dosquare(baseA, sizeA, baseAA); hd = &baseAA[sizeAA]; len = sizetotal - shift * 2 - sizeAA; while (len--) *hd++ = 0; /* * Sum the two squares into a temporary location. */ if (sizeAA >= sizeBB) { h1 = baseAA; h2 = baseBB; sizeAABB = sizeAA; sumsize = sizeBB; } else { h1 = baseBB; h2 = baseAA; sizeAABB = sizeBB; sumsize = sizeAA; } copysize = sizeAABB - sumsize; hd = baseAABB; carry = 0; while (sumsize--) { sival.ivalue = ((FULL) *h1++) + ((FULL) *h2++) + carry; *hd++ = sival.silow; carry = sival.sihigh; } while (copysize--) { sival.ivalue = ((FULL) *h1++) + carry; *hd++ = sival.silow; carry = sival.sihigh; } if (carry) { *hd = (HALF)carry; sizeAABB++; } /* * Add the sum back into the previously calculated squares * shifted over to the proper location. */ h1 = baseAABB; hd = ans + shift; carry = 0; len = sizeAABB; while (len--) { sival.ivalue = ((FULL) *hd) + ((FULL) *h1++) + carry; *hd++ = sival.silow; carry = sival.sihigh; } while (carry) { /* * XXX - Saber-C Version 3.1 says: * * W#113, Using allocated data what has not been set. * * This warning is issued during the regression test #622 * (algcheck 1). * * Saber-C claims that *hd has been allocated but not set. * When this warning was first issued, hd == 0x19547c * which is 12 bytes off from ans. The value of carry * was 1, the value of shift was 2, len was -1 and sizeAABB * was 4, sizeBB as 4, sizeAA was 2. The value stored * in *hd was 0xbfbf. * * The call stack is: * * dosquare(0x183e88, 3, 0x195470) at "zmul.c":981 * zsquare(0x1952b0, 0x4c1348) at "zmul.c":687 * zpowermod(0x5d1d58,0x5d1d5c,0x5d1d60,0x390218) at "zmod.c":679 * qpowermod(0x38a088, 0x3901a8, 0x38a964) at "qfunc.c":76 * builtinfunc(113, 3, 0x1740f0) at "func.c":386 * o_call(0x3b5de0, 113, 3) at "opcodes.c":2094 * calculate(0x3b5de0, 3) at "opcodes.c":288 * o_usercall(0x3b5de0, 13, 3) at "opcodes.c":2082 * calculate(0x47dfa8, 0) at "opcodes.c":288 * o_usercall(0x47dfa8, 14, 0) at "opcodes.c":2082 * calculate(0x16cca8, 0) at "opcodes.c":288 * evaluate(0) at "codegen.c":167 * getcommands(0) at "codegen.c":106 * getcommands(1) at "codegen.c":76 * main(-1, 0x181f8c) at "calc.c":155 * * Purify does not report this. Who is right? */ sival.ivalue = ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } /* * Calculate the absolute value of the difference of the two halfs * into a temporary location. */ if (sizeA == sizeB) { len = sizeA; h1 = &baseA[len - 1]; h2 = &baseB[len - 1]; while ((len > 1) && (*h1 == *h2)) { len--; h1--; h2--; } } if ((sizeA > sizeB) || ((sizeA == sizeB) && (*h1 > *h2))) { h1 = baseA; h2 = baseB; sizeAB = sizeA; subsize = sizeB; } else { h1 = baseB; h2 = baseA; sizeAB = sizeB; subsize = sizeA; } copysize = sizeAB - subsize; hd = baseAB; carry = 0; while (subsize--) { sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry; *hd++ = BASE1 - sival.silow; carry = sival.sihigh; } while (copysize--) { sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry; *hd++ = BASE1 - sival.silow; carry = sival.sihigh; } hd = &baseAB[sizeAB - 1]; while ((*hd == 0) && (sizeAB > 1)) { sizeAB--; hd--; } /* * Now square the number into another temporary location, * and subtract that from the final result. */ sizeABAB = dosquare(baseAB, sizeAB, baseABAB); h1 = baseABAB; hd = ans + shift; carry = 0; while (sizeABAB--) { sival.ivalue = BASE1 - ((FULL) *hd) + ((FULL) *h1++) + carry; *hd++ = BASE1 - sival.silow; carry = sival.sihigh; } while (carry) { sival.ivalue = BASE1 - ((FULL) *hd) + carry; *hd++ = BASE1 - sival.silow; carry = sival.sihigh; } /* * Return the size of the result. */ len = sizetotal; hd = &ans[len - 1]; while ((*hd == 0) && (len > 1)) { len--; hd--; } tempbuf = temp; return len; } /* * Return a pointer to a buffer to be used for holding a temporary number. * The buffer will be at least as large as the specified number of HALFs, * and remains valid until the next call to this routine. The buffer cannot * be freed by the caller. There is only one temporary buffer, and so as to * avoid possible conflicts this is only used by the lowest level routines * such as divide, multiply, and square. */ HALF * zalloctemp(len) LEN len; /* required number of HALFs in buffer */ { HALF *hp; static LEN buflen; /* current length of temp buffer */ static HALF *bufptr; /* pointer to current temp buffer */ if (len <= buflen) return bufptr; /* * We need to grow the temporary buffer. * First free any existing buffer, and then allocate the new one. * While we are at it, make the new buffer bigger than necessary * in order to reduce the number of reallocations. */ len += 100; if (buflen) { buflen = 0; free(bufptr); } hp = (HALF *) malloc(len * sizeof(HALF)); if (hp == NULL) math_error("No memory for temp buffer"); bufptr = hp; buflen = len; return hp; } /* END CODE */